# -*- coding: UTF-8 -*-
import pandas as pd
import numpy as np
from sklearn.cluster import KMeans
import json

locaddr = pd.read_csv('shanghai4月21日addresslocation.csv', names=['addr', 'lng', 'lat'], skiprows=1)
locations = locaddr.copy().dropna()
location = locations[['lng', 'lat']]
# locations =locaddr[['lng', 'lat']]
# data = locations.dropna()
data = np.array(location)
loc_cluster = KMeans(n_clusters=10, init='k-means++')
loc_cluster.fit(data)
centers = loc_cluster.cluster_centers_
labels = loc_cluster.predict(data)
locations.loc[:,'labels'] = labels
print(locations.head())
print(centers)
locations.to_csv('shanghai4月21日addresslocationclustered.csv')
# len(labels)

